clear
set more off
pause on
set matsize 10000

******************************************************************************************
* Name: 	regs-incumbents.do
* Purpose: 	this do file uses merged election data to run regressions for incumbents only
* Input:	ca-election-full.dta
******************************************************************************************

*****************************
* Set locals	
*****************************

* Create a folder to store the results
local tabs = "tabs-incumbents"
cap mkdir `tabs'

cap log close
log using regs-incumbents.log, text replace

*****************************
* Sample definition
*****************************

use ca-election-full, clear

desc
summ

* Drop if first election after 2010
drop if first_election==.
count 

* Drop names that can't be classified (either ambiguous gender or not in my names database) 
replace sex=. if sex==999
drop if sex==.

assert first_election!=.
assert sex!=.

replace first_incumbent = 1 if first_inoffice==1 &  strpos(first_occ, "APPOINTED")==0	
replace first_incumbent = 1 if first_occ_group==32
count 

* Define regression controls: first election year, county
egen county = group(CNTYNAME)
local controls = "i.election_year i.county"

* Define clustering
local cluster_var = "county"

* Keep (1) individuals who are listed as incumbents before 2000 and are not appointed; 
*		   limit to before 2000 because I should observe the initial elections of all elected incumbents from 2000 on
*      (2) individuals who won their first election and ran again within four years 

gen to_keep = 1 if first_incumbent==1 & first_election<2000
replace to_keep = 0 if to_keep==1 & strpos(first_occ, "APPOINTED")!=0 
replace to_keep = 0 if to_keep==1 & first_num_cand==1
replace to_keep = 1 if first_incumbent!=1 & first_elected==1 & run_atall==1 & second_election<=2010
keep if to_keep == 1
drop to_keep

**************************
* Variable definition
*************************

* for individuals who are incumbents before 2000, look at their subsequent behavior
gen second_loss = 1 if first_incumbent==1 & first_elected==0
replace second_loss = 0 if first_incumbent==1 & first_elected==1

* for individiuals who won their first election and ran again in four years
replace second_loss = 1 if first_incumbent==0 & win==0
replace second_loss = 0 if first_incumbent==0 & win==1

* outcome: run again
gen second_run_atall = 1 if first_incumbent==1 & run_atall==1
replace second_run_atall = 0 if first_incumbent==1 & run_atall==0
replace second_run_atall = 1 if first_incumbent==0 & second_run4!=0 & second_run4!=.
replace second_run_atall = 0 if first_incumbent==0 & second_run4==0 

gen female_second_loss = female*second_loss

gen second_margin = first_margin if first_incumbent==1
replace second_margin = margin_next if first_incumbent==0
replace second_margin = . if second_margin==1

gen left = second_margin<0
gen right = second_margin>0 & second_margin!=.
gen second_margin_right = second_margin*right
gen second_margin_left = second_margin*left

gen female_margin = female*second_margin_left
gen female_margin_right = female*second_margin_right

gen marginsq = second_margin*second_margin
gen margin_rightsq = marginsq*right
gen margin_leftsq = marginsq*left

gen female_marginsq = female*margin_leftsq
gen female_margin_rightsq = female*margin_rightsq
 
gen election_year = first_election if first_incumbent==1
replace election_year = second_election if first_incumbent==0
 
drop first*

****************************
* Parametric regressions
****************************

local main_vars = "second_loss second_margin_left second_margin_right"
local main_vars_sq = "second_loss second_margin_left second_margin_right margin_leftsq margin_rightsq"
local main_vars_f = "female_second_loss second_loss female second_margin_left second_margin_right female_margin female_margin_right" 
local main_vars_f_sq = "female_second_loss second_loss female second_margin_left second_margin_right female_margin female_margin_right margin_leftsq margin_rightsq female_marginsq female_margin_rightsq"

foreach y in second_run_atall {

		* Linear
		reg `y' `main_vars' `controls', cluster(`cluster_var') 
		summ `y' if second_loss==0 & e(sample)	
		outreg2 using `tabs'/rdpara-`y'.xls, excel ctitle("All") dec(3) addstat(Mean Winners, r(mean)) label replace 
		
		* Quadratic
		reg `y' `main_vars_sq' `controls' , cluster(`cluster_var') 
		summ `y' if second_loss==0 & e(sample)	
		outreg2 using `tabs'/rdpara-`y'.xls, excel ctitle("All") dec(3) addstat(Mean Winners, r(mean)) label append
		
		* Linear, female
		reg `y' `main_vars' `controls' if female==1, cluster(`cluster_var') 
		summ `y' if second_loss==0 & e(sample)	
		outreg2 using `tabs'/rdpara-`y'.xls, excel ctitle("Female") dec(3) addstat(Mean Winners, r(mean)) label append
		
		* Quadratic, female
		reg `y' `main_vars_sq' `controls' if  female==1, cluster(`cluster_var') 
		summ `y' if second_loss==0 & e(sample)	
		outreg2 using `tabs'/rdpara-`y'.xls, excel ctitle("Female") dec(3) addstat(Mean Winners, r(mean)) label append
		
		* Linear, male
		reg `y' `main_vars' `controls' if female==0, cluster(`cluster_var') 
		summ `y' if second_loss==0 & e(sample)	
		outreg2 using `tabs'/rdpara-`y'.xls, excel ctitle("Male") dec(3) addstat(Mean Winners, r(mean)) label append
		
		* Quadratic, male
		reg `y' `main_vars_sq' `controls' if  female==0, cluster(`cluster_var') 
		summ `y' if second_loss==0 & e(sample)	
		outreg2 using `tabs'/rdpara-`y'.xls, excel ctitle("Male") dec(3) addstat(Mean Winners, r(mean)) label append
		
		* compare male/female coefficients, linear
		reg `y' `main_vars' `controls' if  female==1
		est store regf
		reg `y' `main_vars' `controls' if  female==0
		est store regm
		suest regf regm, cluster(`cluster_var')
		test [regf_mean]second_loss=[regm_mean]second_loss
		
		* compare male/female coefficients, quadratic
		reg `y' `main_vars_sq' `controls' if  female==1
		est store regf
		reg `y' `main_vars_sq' `controls' if  female==0
		est store regm
		suest regf regm, cluster(`cluster_var')
		test [regf_mean]second_loss=[regm_mean]second_loss
		
		* Interact female 
		reg `y' `main_vars_f' `controls' , cluster(`cluster_var') 
		summ `y' if second_loss==0 & e(sample)	
		outreg2 using `tabs'/rdpara-`y'-female.xls, excel ctitle("All") dec(3) addstat(Mean Winners, r(mean)) label replace
		reg `y' `main_vars_f_sq' `controls' , cluster(`cluster_var') 
		summ `y' if second_loss==0 & e(sample)	
		outreg2 using `tabs'/rdpara-`y'-female.xls, excel ctitle("All") dec(3) addstat(Mean Winners, r(mean)) label append
}

************************************
* Local linear regression 		
************************************

**********************
* kernel: uniform
* with controls
**********************

foreach y in second_run_atall {
	
	rdrobust `y' second_margin, kernel(uni) vce(cluster `cluster_var')
	local bw_all = round(e(h_l), 0.001)

	rdrobust `y' second_margin if female==0, kernel(uni) vce(cluster `cluster_var')
	local bw_m = round(e(h_l), 0.001)
	rdrobust `y' second_margin if female==1, kernel(uni) vce(cluster `cluster_var')
	local bw_f = round(e(h_l), 0.001)
	
	local bw2_all = 2*`bw_all'
	local bwhalf_all = 0.5*`bw_all'

	local bw2_m = 2*`bw_m'
	local bwhalf_m = 0.5*`bw_m'

	local bw2_f = 2*`bw_f'
	local bwhalf_f = 0.5*`bw_f'
	
	foreach h in bw bw2 bwhalf {
	
		local range = "second_margin>-``h'_all' & second_margin<``h'_all' "
	
		* All
		reg `y' `main_vars' `controls' if  `range', cluster(`cluster_var')
		summ `y' if second_loss==0 & e(sample)
		if "`h'"=="bw" outreg2 using `tabs'/rdllr-`y'-controls-bygender.xls, excel ctitle("All ``h'_all'") dec(3) label addstat(Mean Winners, r(mean)) replace 
		if "`h'"!="bw" outreg2 using `tabs'/rdllr-`y'-controls-bygender.xls, excel ctitle("All ``h'_all'") dec(3) label addstat(Mean Winners, r(mean)) append 

		* Interaction
		reg `y' `main_vars_f' `controls' if  `range', cluster(`cluster_var')
		summ `y' if second_loss==0 & e(sample)	
		if "`h'"=="bw" outreg2 using `tabs'/rdllr-`y'-controls-interact.xls, excel ctitle("Interact threshold ``h'_all'") dec(3) label addstat(Mean Winners, r(mean)) replace 	
		if "`h'"!="bw" outreg2 using `tabs'/rdllr-`y'-controls-interact.xls, excel ctitle("Interact all ``h'_all'") dec(3) label addstat(Mean Winners, r(mean)) append 
	}
	
	* Male
	foreach h in bw bw2 bwhalf {
	
		local range_m = "second_margin>-``h'_m' & second_margin<``h'_m' "
		local range_f = "second_margin>-``h'_f' & second_margin<``h'_f' "
	
		reg `y' `main_vars' `controls' if  `range_m' & female==0, cluster(`cluster_var')
		summ `y' if second_loss==0 & e(sample)
		outreg2 using `tabs'/rdllr-`y'-controls-bygender.xls, excel ctitle("Male ``h'_m'") dec(3) label addstat(Mean Winners, r(mean)) append 
	}
	
	* Female
	foreach h in bw bw2 bwhalf {
	
		local range_m = "second_margin>-``h'_m' & second_margin<``h'_m' "
		local range_f = "second_margin>-``h'_f' & second_margin<``h'_f' "
	
		reg `y' `main_vars' `controls' if  `range_f' & female==1, cluster(`cluster_var')
		summ `y' if second_loss==0 & e(sample)
		outreg2 using `tabs'/rdllr-`y'-controls-bygender.xls, excel ctitle("Female ``h'_f'") dec(3) label addstat(Mean Winners, r(mean)) append 
		
		* test equality of male/female coeffs
		reg `y' `main_vars' `controls' if  `range_f' & female==1
		est store regf
		reg `y' `main_vars' `controls' if  `range_m' & female==0
		est store regm
		suest regf regm, cluster(`cluster_var')
		test [regf_mean]second_loss=[regm_mean]second_loss
	}		

}
